Submit two files that are labeled: YourLastName_Assignment_3 that have the following formats:
Formatting Python Code When programming in Python, refer to Kenneth Reitz’ PEP 8: The Style Guide for Python Code: http://pep8.org/ (Links to an external site.)Links to an external site. There is the Google style guide for Python at https://google.github.io/styleguide/pyguide.html (Links to an external site.)Links to an external site. Comment often and in detail.
In his first state of the uniion address , president Trump mentioned Chicago violance 10 times Trump's State of the Union Address
Columnist Clarence Page wrote an article , published by the Chicago Tribune stated that the city of Chicago had more homicides in the past two years than New York and Los Angeles combined
The CSV file for crimes dataset for the city of Chicago is obtained from the data portal for the city of Chicago. Here is the link for the city of Chicago data portal City of Chicago Data Portal
Three set of data are need for this assignment:
Complete description of the dataset can be found on Chicago city data portal.
Based on Trumps State of the Uniion Address and the article written by columnist Clarence Page and published by the Chicago Tribune, we are interested to retrieve the data for the past two years and perform different types of spatial queries.
There are few of these queries that we are interested in to help CPD and city of Chicago to plot on a Choroplteh map those districts that have highest gun crimes.
Here are examples of those types of queries:
Packages you need to Connect PostgreSQL server to load and retrieve Crhicago Crime dataset from the database:
Since we are using PostGIS in our work, please read and bookmark Chapter 4. Using PostGIS: Data Management and Queries
import sys
!{sys.executable} -m pip install folium
!{sys.executable} -m pip install area
!{sys.executable} -m pip install psycopg2-binary
import folium
from folium import plugins
from folium.plugins import MarkerCluster
import psycopg2
import csv
import pandas as pd
import json
from area import area
from psycopg2.extensions import ISOLATION_LEVEL_AUTOCOMMIT
#folium.__version__
# All data
db_connection = psycopg2.connect(host='129.105.208.229',dbname="chicago_crimes_ut", user="" , password="")
# ----------------------------------------------------------------
# full data
#db_connection = psycopg2.connect(host='129.105.208.229',dbname="chicago_crimes", user="YourNetID" , password="YourPassword")
# -----------------------------------------------------------------
cursor = db_connection.cursor()
The Crimes_2001_to_present.csv is downloaded from Chicago data portal and it has roughly 6.5 million records.
While working on this dataset, It is prudent to make a note of the following:
Sort algorithms used by the database engines vary in performance between O($N log N$) and O($ N^{2} $) where $N$ is the size of the number
Search algorithms used by the database engines vary in performance between O($log N$) and O($ N $) where $N$ is the size of the number
cursor.execute("SELECT district, count(district) \
FROM crimes \
GROUP BY district")
rows=cursor.fetchall()
# put returned data into a dataframe
crimes_per_district = pd.DataFrame(rows, columns=['dist_num','number_of_crimes'])
crimes_per_district['dist_num'] = crimes_per_district['dist_num'].astype(str)
crimes_per_district.head()
# set up map
total_number_of_crimes_per_district_map = folium.Map(location =(41.8781, -87.6298),zoom_start=11)
# load in geo data from geojson file
# set map up with parameters
total_number_of_crimes_per_district_map.choropleth(geo_data="Boundaries.geojson",
fill_color='OrRd',
fill_opacity=0.5,
line_opacity=1,
data = crimes_per_district,
key_on='feature.properties.dist_num',
columns = ['dist_num', 'number_of_crimes'],
legend_name = ' Crimes per District')
# start with Police stations, but eliminate HQ
cursor.execute("""SELECT ST_X(ST_AsText(Where_IS)), ST_Y(ST_AsText(Where_IS)), district \
FROM police_stations \
WHERE district!='Headquarters'""")
police_stations = cursor.fetchall()
# loop through police stations
for police_station in police_stations:
police_station_location = (police_station[0],police_station[1])
cursor.execute("SELECT district, count(district) \
FROM crimes \
WHERE district= %s \
GROUP BY district",[police_station[2]])
districts_crime_numbers = cursor.fetchall()
for district in districts_crime_numbers:
folium.Marker(location = police_station_location,
popup = folium.Popup(html="District No : %s has Total Number of Crimes:%s"
%district ,max_width=450)).add_to(total_number_of_crimes_per_district_map)
# show the map
total_number_of_crimes_per_district_map
Well, we really need only the violent crimes per district, so we will filter only those crimes that we are interested in. Please note that we are not interested to plot property crimes, we are really after violent crimes and in particular Gun related crimes.
So for now, lets plot violent crimes on Choropleth map and later on we will filter only Gun related crimes
violent_crime_categories='THEFT','ASSAULT','ROBBERY','KIDNAPPING','CRIM SEXUAL ASSAULT','BATTERY','MURDER'
# isolate violent crime as defined above
cursor.execute("SELECT district, count(district)\
FROM crimes \
WHERE PRIMARY_TYPE in %s \
GROUP BY district",[violent_crime_categories])
rows=cursor.fetchall()
violent_crime_data=pd.DataFrame(rows, columns=['district_num','number_of_violent_crimes'])
violent_crime_data['district_num'] = violent_crime_data['district_num'].astype(str)
violent_crime_data
violent_crimes_per_district_map= folium.Map(location =(41.8781, -87.6298),zoom_start=11)
violent_crimes_per_district_map.choropleth(geo_data="Boundaries.geojson",
fill_color='YlOrRd',
fill_opacity=0.5,
line_opacity=1,
data = violent_crime_data,
key_on='feature.properties.dist_num',
columns = ['district_num', 'number_of_violent_crimes'],
legend_name="Violent Crime Map"
)
cursor.execute("""SELECT ST_X(ST_AsText(Where_IS)), ST_Y(ST_AsText(Where_IS)), district \
FROM police_stations \
WHERE district!='Headquarters'""")
police_stations = cursor.fetchall()
for police_station in police_stations:
police_station_location =(police_station[0],police_station[1])
cursor.execute("SELECT PRIMARY_TYPE, count(PRIMARY_TYPE) \
FROM crimes where district = %s AND PRIMARY_TYPE in %s \
GROUP BY PRIMARY_TYPE",[police_station[2],violent_crime_categories])
data = cursor.fetchall()
violent_crimes_per_district_df = pd.DataFrame(data, columns=['Description', 'Number of Violent Crimes'])
header = violent_crimes_per_district_df.to_html(classes=
'table table-striped table-hover table-condensed table-responsive')
folium.Marker(location=police_station_location, popup=folium.Popup(html="District Number %s - Violent Crimes %s"
%(police_station[2],header))).add_to(violent_crimes_per_district_map)
violent_crimes_per_district_map
Lets first create a dataframe of gun crimes per district first to get an idea about the number of gun crimes per district
# isolate gun crime
gun='%GUN%'
cursor.execute("SELECT district, count(district)\
FROM crimes\
WHERE DESCRIPTION::text LIKE %s GROUP BY district",[gun])
districts_gun_violent_crimes = cursor.fetchall()
districts_gun_violent_crimes_df = pd.DataFrame(districts_gun_violent_crimes, columns=['dist_num','gun_crimes'])
districts_gun_violent_crimes_df['dist_num'] = districts_gun_violent_crimes_df['dist_num'].astype(str)
districts_gun_violent_crimes_df
districts_gun_violent_crimes_map = folium.Map(location =(41.8781, -87.6298),zoom_start=11)
districts_gun_violent_crimes_map.choropleth(geo_data="Boundaries.geojson",
fill_color='YlOrRd',
fill_opacity=0.5,
line_opacity=1,
data = districts_gun_violent_crimes_df,
key_on='feature.properties.dist_num',
columns = ['dist_num', 'gun_crimes'],
legend_name="Gun Crime"
)
Now, lets create a dataframe of the different types of gun crimes for every district and then plot it on Choropleth map
cursor.execute("""SELECT ST_X(ST_AsText(Where_IS)), ST_Y(ST_AsText(Where_IS)),
district from police_stations where district!='Headquarters'""")
gun='%GUN%'
police_stations = cursor.fetchall()
for police_station in police_stations:
police_station_location = (police_station[0],police_station[1])
cursor.execute("""SELECT DESCRIPTION, count(DESCRIPTION) \
FROM crimes \
WHERE district=%s and DESCRIPTION::text LIKE %s GROUP BY DESCRIPTION""",[police_station[2],gun])
district_gun_violent_crimes=cursor.fetchall()
district_gun_violent_crimes_df=\
pd.DataFrame(district_gun_violent_crimes, columns=['Description', 'Number of Gun Crime'])
header = district_gun_violent_crimes_df.to_html(classes=
'table table-striped table-hover table-condensed table-responsive')
folium.Marker(location=police_station_location,popup=folium.Popup(html="District No: %s GUN_Crime: %s"
%(police_station[2],header) )).add_to(districts_gun_violent_crimes_map)
districts_gun_violent_crimes_map
district = []
tarea=[]
with open('Boundaries.geojson') as f:
data = json.load(f)
a = data['features']
for i in range(len(a)):
obj=a[i]['geometry']
n= a[i]['properties']
district.append(n['dist_num'])
tarea.append(area(obj)/10000)
af = pd.DataFrame({'dist_num': district,'district_area_inHectares':tarea})
af['dist_num'] = af['dist_num'].astype(str)
final_data= pd.merge(af, crimes_per_district, on='dist_num', how='inner')
final_data['crime_density'] = round(final_data['number_of_crimes']/(final_data['district_area_inHectares']/100))
final_data
gun_crime_arrests_map = folium.Map(location =(41.8781, -87.6298),zoom_start=11)
gun_crime_arrests_map.choropleth(geo_data="Boundaries.geojson",
fill_color='YlOrRd',
fill_opacity=0.5,
line_opacity=1,
data = districts_gun_violent_crimes_df, #can use data from #3
key_on='feature.properties.dist_num',
columns = ['dist_num', 'gun_crimes'],
legend_name="Gun Crime"
)
cursor.execute("""SELECT ST_X(ST_AsText(Where_IS)), ST_Y(ST_AsText(Where_IS)),
district from police_stations where district!='Headquarters'""")
gun='%GUN%'
police_stations = cursor.fetchall()
marker_cluster = MarkerCluster().add_to(gun_crime_arrests_map)
for police_station in police_stations:
police_station_location = (police_station[0],police_station[1])
cursor.execute("""SELECT DISTINCT ON(caseno) caseno, block,DESCRIPTION, count(arrest), arrest,latitude,longitude\
FROM crimes\
WHERE district=%s and DESCRIPTION::text LIKE %s \
GROUP BY caseno,block, DESCRIPTION,arrest, latitude, longitude""",[police_station[2],gun])
crimes_per_district = cursor.fetchall()
for crime in crimes_per_district:
if crime[4]==True:
folium.Marker(location=(crime[5],crime[6]),popup=
folium.Popup(html="District No: %s <br> Description: %s <br> Block: %s"
%(police_station[2],crime[2],crime[1])),icon=folium.Icon(color='green',
icon='ok-sign'),).add_to(marker_cluster)
else:
folium.Marker(location=(crime[5],crime[6]),popup=folium.Popup(html=
"District No: %s <br> Description: %s<br> Block: %s"
%(police_station[2],crime[2],crime[1])),icon=folium.Icon(color='red',
icon='remove-sign'),).add_to(marker_cluster)
gun_crime_arrests_map
farthest_block_gun_crime_map = folium.Map(location =(41.8781, -87.6298),zoom_start=11)
farthest_block_gun_crime_map.choropleth(geo_data="Boundaries.geojson",
fill_color='YlOrRd',
fill_opacity=0.5,
line_opacity=1,
data = districts_gun_violent_crimes_df,
key_on='feature.properties.dist_num',
columns = ['dist_num', 'gun_crimes'],
legend_name="GUN CRIME"
)
cursor.execute("""SELECT ST_X(ST_AsText(Where_IS)), ST_Y(ST_AsText(Where_IS)),
district from police_stations where district!='Headquarters'""")
police_stations = cursor.fetchall()
gun='%GUN%'
for police_station in police_stations:
cursor.execute("""SELECT DISTINCT on (A.block) A.district,A.block, A.where_is,ST_Distance(A.where_is,B.where_is)\
FROM crimes as A, police_stations as B
WHERE A.district=%s and DESCRIPTION::text LIKE %s and B.district= %s
and ST_Distance(A.where_is,B.where_is)
IN
( SELECT dist
FROM
(SELECT ST_Distance(A.where_is,B.where_is) as dist
FROM crimes as A, police_stations as B
WHERE A.district=%s and DESCRIPTION::text LIKE %s and B.district= %s ) as f)""",
[police_station[2],gun,police_station[2],police_station[2],gun, police_station[2]])
farthest_block_gun_crime = cursor.fetchall()
cursor.execute("SELECT ST_X(ST_AsText(%s)), ST_Y(ST_AsText(%s))"
,(farthest_block_gun_crime[0][2],farthest_block_gun_crime[0][2]))
farthest_block_gun_crime_location = cursor.fetchall()
folium.Marker(location=(police_station[0],police_station[1]),popup=
folium.Popup(html="Police Station <br> District No.:%s <br> Farthest Gun_Crime Block:%s"%\
(farthest_block_gun_crime[0][0],farthest_block_gun_crime[0][1]))).add_to(farthest_block_gun_crime_map)
folium.CircleMarker(farthest_block_gun_crime_location[0],radius=5,color='#ff3187',popup=
folium.Popup(html="District No.:%s <br> Block:%s"\
%(farthest_block_gun_crime[0][0],farthest_block_gun_crime[0][1]))).add_to(farthest_block_gun_crime_map)
farthest_block_gun_crime_map
The HTML document your are submitting must have the source code and the output for the following requirements
gun='%GUN%'
#execute query to get all occurrences
cursor.execute("""SELECT district,Block, count(Block)\
FROM crimes\
WHERE DESCRIPTION::text LIKE %s \
GROUP BY district, Block""", [gun])
max_gun_crimes = cursor.fetchall()
# converted to datframe
max_gun_crimes_df= pd.DataFrame(max_gun_crimes, columns=['district', 'Block','maxBlock'])
max_gun_crimes_df = max_gun_crimes_df.sort_values('maxBlock', ascending =False)
max_gun_crimes_df = max_gun_crimes_df.drop_duplicates(subset='district', keep='first', inplace=False)
max_gun_crimes_df = max_gun_crimes_df.sort_values('district', ascending = True)
max_gun_crimes_df['district'] = max_gun_crimes_df['district'].astype(str)
max_gun_crimes_df
As one can see from the table above the Block 071XX S WABASH AVE in Districts 11, 009XX N HAMLIN AVE in District 11, 015XX S KOLIN AVE in district 10 and 078XX S ASHLAND AVE in district 6 have a max count of 3 gun crimes.
cursor.execute("""SELECT ST_X(ST_AsText(Where_IS)), ST_Y(ST_AsText(Where_IS)),
district from police_stations where district!='Headquarters'""")
police_stations = cursor.fetchall()
police_stations_df = pd.DataFrame(police_stations, columns=['lat', 'long', 'district'])
max_crimes_map = folium.Map(location =(41.8781, -87.6298),zoom_start=11)
max_crimes_map.choropleth(geo_data="Boundaries.geojson",
fill_color='YlOrRd',
fill_opacity=0.5,
line_opacity=1,
data = max_gun_crimes_df,
key_on='feature.properties.dist_num',
columns = ['district', 'maxBlock'],
legend_name="UNLAWFUL POSS OF HANDGUN"
)
gun='%GUN%'
#did a loop to only print out one marker per district
for district in max_gun_crimes_df['district'].unique():
maxheader = max_gun_crimes_df[max_gun_crimes_df['district'] == district].to_html(classes=
'table table-striped table-hover table-condensed table-responsive')
#had to convert to int64 type to compare
locbool = police_stations_df['district'].astype('int64') == float(district)
#added marker to location based on police lat and long, then added subset of datframe using maxheader
folium.Marker(
location=(police_stations_df[locbool]['lat'], police_stations_df[locbool]['long']),
popup=folium.Popup(html="District Number %s - Max Crimes %s"
%(int(district), maxheader))).add_to(max_crimes_map)
max_crimes_map
district = []
tarea = []
#Helps Find Area
with open('Boundaries.geojson') as f:
data = json.load(f)
a = data['features']
for i in range(len(a)):
obj=a[i]['geometry']
n= a[i]['properties']
district.append(n['dist_num'])
tarea.append(area(obj)/10000)
#Make Dataframe out of Dict
af = pd.DataFrame({'dist_num': district,'district_area_inHectares':tarea})
af['dist_num'] = af['dist_num'].astype(str)
#Merge Gun Data Set with Dataset just created
final_data= pd.merge(af, districts_gun_violent_crimes_df , on='dist_num', how='inner')
#Calculate density
final_data['gun_density'] = round(final_data['gun_crimes']/(final_data['district_area_inHectares']/100))
final_data
gun='%UNLAWFUL POSS OF HANDGUN%'
cursor.execute("""SELECT district, count(district) \
FROM crimes\
WHERE DESCRIPTION::text LIKE %s\
GROUP BY district""", [gun])
farthest_block = cursor.fetchall()
districts_farthest_gun_violent_crimes_df = pd.DataFrame(farthest_block, columns=['district_num', 'block_guns'])
districts_farthest_gun_violent_crimes_df['district_num'] = districts_farthest_gun_violent_crimes_df['district_num'].astype(str)
farthest_block_unlawful_poss_map = folium.Map(location =(41.8781, -87.6298),zoom_start=11)
farthest_block_unlawful_poss_map.choropleth(geo_data="Boundaries.geojson",
fill_color='YlOrRd',
fill_opacity=0.5,
line_opacity=1,
data = districts_farthest_gun_violent_crimes_df,
key_on='feature.properties.dist_num',
columns = ['district_num', 'block_guns'],
legend_name="UNLAWFUL POSS OF HANDGUN"
)
cursor.execute("""SELECT ST_X(ST_AsText(Where_IS)), ST_Y(ST_AsText(Where_IS)),
district from police_stations where district!='Headquarters'""")
police_stations = cursor.fetchall()
for police_station in police_stations:
#Tried Try Block as some queries do not return data from a district
#Changed description to what was asked
try:
cursor.execute("""SELECT DISTINCT on (A.block) A.district,A.block, A.where_is,ST_Distance(A.where_is,B.where_is)\
FROM crimes as A, police_stations as B
WHERE A.district=%s and DESCRIPTION = 'UNLAWFUL POSS OF HANDGUN' and B.district= %s
and ST_Distance(A.where_is,B.where_is)
IN
( SELECT max(dist)
FROM
(SELECT ST_Distance(A.where_is,B.where_is) as dist
FROM crimes as A, police_stations as B
WHERE A.district=%s and DESCRIPTION='UNLAWFUL POSS OF HANDGUN' and B.district= %s) as f)""",
[police_station[2], police_station[2],police_station[2], police_station[2]])
farthest_block_unlawful_poss = cursor.fetchall()
cursor.execute("SELECT ST_X(ST_AsText(%s)), ST_Y(ST_AsText(%s))"
,(farthest_block_unlawful_poss[0][2],farthest_block_unlawful_poss[0][2]))
farthest_block_unlawful_poss_location = cursor.fetchall()
except IndexError:
continue
folium.Marker(location=(police_station[0],police_station[1]),popup=
folium.Popup(html="Police Station <br> District No.:%s <br> Farthest Possession_Crime Block:%s"%\
(farthest_block_unlawful_poss[0][0],farthest_block_unlawful_poss[0][1]))).add_to(farthest_block_unlawful_poss_map)
folium.CircleMarker(farthest_block_unlawful_poss_location[0],radius=5,color='#ff3187',popup=
folium.Popup(html="District No.:%s <br> Block:%s"\
%(farthest_block_unlawful_poss[0][0],farthest_block_unlawful_poss[0][1]))).add_to(farthest_block_unlawful_poss_map)
farthest_block_unlawful_poss_map
gun_crime_arrests_map = folium.Map(location =(41.8781, -87.6298),zoom_start=11)
gun_crime_arrests_map.choropleth(geo_data="Boundaries.geojson",
fill_color='YlOrRd',
fill_opacity=0.5,
line_opacity=1,
data = districts_gun_violent_crimes_df, #can use data from #3
key_on='feature.properties.dist_num',
columns = ['dist_num', 'gun_crimes'],
legend_name="Gun Related Crimes"
)
cursor.execute("""SELECT ST_X(ST_AsText(Where_IS)), ST_Y(ST_AsText(Where_IS)),
district from police_stations where district!='Headquarters'""")
police_stations = cursor.fetchall()
marker_cluster = MarkerCluster().add_to(gun_crime_arrests_map)
gun ='%GUN%' #find gun phrase
for police_station in police_stations:
police_station_location = (police_station[0],police_station[1])
gun ='%GUN%'
cursor.execute("""SELECT DISTINCT ON(caseno) caseno, block,DESCRIPTION, count(arrest), arrest,latitude,longitude, LOCATION_DESCRIPTION\
FROM crimes\
WHERE district=%s and DESCRIPTION::text LIKE %s \
GROUP BY caseno,block, DESCRIPTION,arrest, latitude, longitude, LOCATION_DESCRIPTION""",[police_station[2],gun])
crimes_per_district = cursor.fetchall()
#print(crimes_per_district)
for crime in crimes_per_district:
#Condition changed to find residence location and index was 7 out of the tuple,
if "RESIDENCE" in crime[7]:
folium.Marker(location=(crime[5],crime[6]),popup=
folium.Popup(html="District No: %s <br> Description: %s <br> Block: %s <br> Location Desciption: %s"
%(police_station[2],crime[2],crime[1], crime[7])),icon=folium.Icon(color='green',
icon='ok-sign'),).add_to(marker_cluster)
else:
#Changed condition to find street location
if "STREET"in crime[7]:
folium.Marker(location=(crime[5],crime[6]),popup=folium.Popup(html=
"District No: %s <br> Description: %s<br> Block: %s <br> Location Desciption: %s"
%(police_station[2],crime[2],crime[1], crime[7])),icon=folium.Icon(color='red',
icon='remove-sign'),).add_to(marker_cluster)
gun_crime_arrests_map